#!/usr/bin/env python
# License: GPLv3 Copyright: 2014, Kovid Goyal <kovid at kovidgoyal.net>


import glob
import json
import os
import re
import shutil
import sys
from collections import defaultdict, namedtuple
from functools import partial
from itertools import chain

from calibre import prints
from calibre.constants import config_dir, filesystem_encoding, iswindows
from calibre.spell import parse_lang_code
from calibre.utils.config import JSONConfig
from calibre.utils.icu import capitalize
from calibre.utils.localization import _, get_lang, get_system_locale
from calibre.utils.resources import get_path as P

Dictionary = namedtuple('Dictionary', 'primary_locale locales dicpath affpath builtin name id')
LoadedDictionary = namedtuple('Dictionary', 'primary_locale locales obj builtin name id')
dprefs = JSONConfig('dictionaries/prefs.json')
dprefs.defaults['preferred_dictionaries'] = {}
dprefs.defaults['preferred_locales'] = {}
dprefs.defaults['user_dictionaries'] = [{'name':_('Default'), 'is_active':True, 'words':[]}]
not_present = object()


class UserDictionary:

    __slots__ = ('is_active', 'name', 'words')

    def __init__(self, **kwargs):
        self.name = kwargs['name']
        self.is_active = kwargs['is_active']
        self.words = set(map(tuple, kwargs['words']))

    def serialize(self):
        return {'name':self.name, 'is_active': self.is_active, 'words': list(self.words)}


_builtins = _custom = None


def builtin_dictionaries():
    global _builtins
    if _builtins is None:
        dics = []
        for lc in glob.glob(os.path.join(P('dictionaries', allow_user_override=False), '*/locales')):
            with open(lc, 'rb') as lcf:
                locales = list(filter(None, lcf.read().decode('utf-8').splitlines()))
            locale = locales[0]
            base = os.path.dirname(lc)
            dics.append(Dictionary(
                parse_lang_code(locale), frozenset(map(parse_lang_code, locales)), os.path.join(base, f'{locale}.dic'),
                os.path.join(base, f'{locale}.aff'), True, None, None))
        _builtins = frozenset(dics)
    return _builtins


def catalog_online_dictionaries():
    loaded = json.loads(P('dictionaries/online-catalog.json', allow_user_override=False, data=True))
    try:
        loaded.update(json.loads(P('dictionaries/online-catalog.json', data=True)))
    except Exception:
        pass
    rslt = []
    for lang, directory in loaded.items():
        rslt.append({'primary_locale':parse_lang_code(lang), 'name':lang,'directory':directory})
    return rslt


def custom_dictionaries(reread=False):
    global _custom
    if _custom is None or reread:
        dics = []
        for lc in glob.glob(os.path.join(config_dir, 'dictionaries', '*/locales')):
            with open(lc, 'rb') as cdf:
                locales = list(filter(None, cdf.read().decode('utf-8').splitlines()))
            try:
                name, locale, locales = locales[0], locales[1], locales[1:]
            except IndexError:
                continue
            base = os.path.dirname(lc)
            ploc = parse_lang_code(locale)
            if ploc.countrycode is None:
                continue
            dics.append(Dictionary(
                ploc, frozenset(filter(lambda x: x.countrycode is not None, map(parse_lang_code, locales))), os.path.join(base, f'{locale}.dic'),
                os.path.join(base, f'{locale}.aff'), False, name, os.path.basename(base)))
        _custom = frozenset(dics)
    return _custom


default_en_locale = 'en-US'
try:
    ul = parse_lang_code(get_system_locale() or 'en-US')
except ValueError:
    ul = None
if ul is not None and ul.langcode == 'eng' and ul.countrycode in 'GB BS BZ GH IE IN JM NZ TT'.split():
    default_en_locale = 'en-' + ul.countrycode
default_preferred_locales = {'eng':default_en_locale, 'deu':'de-DE', 'spa':'es-ES', 'fra':'fr-FR'}


def best_locale_for_language(langcode):
    best_locale = dprefs['preferred_locales'].get(langcode, default_preferred_locales.get(langcode, None))
    if best_locale is not None:
        return parse_lang_code(best_locale)


def preferred_dictionary(locale):
    return {parse_lang_code(k):v for k, v in dprefs['preferred_dictionaries'].items()}.get(locale, None)


def remove_dictionary(dictionary):
    if dictionary.builtin:
        raise ValueError('Cannot remove builtin dictionaries')
    base = os.path.dirname(dictionary.dicpath)
    shutil.rmtree(base)
    dprefs['preferred_dictionaries'] = {k:v for k, v in dprefs['preferred_dictionaries'].items() if v != dictionary.id}


def rename_dictionary(dictionary, name):
    lf = os.path.join(os.path.dirname(dictionary.dicpath), 'locales')
    with open(lf, 'r+b') as f:
        lines = f.read().splitlines()
        lines[:1] = [name.encode('utf-8')]
        f.seek(0), f.truncate(), f.write(b'\n'.join(lines))
    custom_dictionaries(reread=True)


def get_dictionary(locale, exact_match=False):
    preferred = preferred_dictionary(locale)
    # First find all dictionaries that match locale exactly
    exact_matches = {}
    for collection in (custom_dictionaries(), builtin_dictionaries()):
        for d in collection:
            if d.primary_locale == locale:
                exact_matches[d.id] = d
        for d in collection:
            for q in d.locales:
                if q == locale and d.id not in exact_matches:
                    exact_matches[d.id] = d

    # If the user has specified a preferred dictionary for this locale, use it,
    # otherwise, if a builtin dictionary exists, use that
    if preferred in exact_matches:
        return exact_matches[preferred]
    # Return one of the exactly matching dictionaries, preferring user
    # installed to builtin ones
    for k in sorted(exact_matches, key=lambda x: (1, None) if x is None else (0, x)):
        return exact_matches[k]

    if exact_match:
        return

    # No dictionary matched the locale exactly, we will now fallback to
    # matching only on language. First see if a dictionary matching the
    # preferred locale for the language exists.
    best_locale = best_locale_for_language(locale.langcode)
    if best_locale is not None:
        ans = get_dictionary(best_locale, exact_match=True)
        if ans is not None:
            return ans

    # Now just return any dictionary that matches the language, preferring user
    # installed ones to builtin ones
    for collection in (custom_dictionaries(), builtin_dictionaries()):
        for d in sorted(collection, key=lambda d: d.name or ''):
            if d.primary_locale.langcode == locale.langcode:
                return d


def load_dictionary(dictionary):
    from calibre_extensions import hunspell

    def fix_path(path):
        if isinstance(path, bytes):
            path = path.decode(filesystem_encoding)
        path = os.path.abspath(path)
        if iswindows:
            path = fr'\\?\{path}'
        return path

    obj = hunspell.Dictionary(fix_path(dictionary.dicpath), fix_path(dictionary.affpath))
    return LoadedDictionary(dictionary.primary_locale, dictionary.locales, obj, dictionary.builtin, dictionary.name, dictionary.id)


class Dictionaries:

    def __init__(self):
        self.remove_hyphenation = re.compile(r'[\u2010-]+')
        self.negative_pat = re.compile(r'-[.\d+]')
        self.fix_punctuation_pat = re.compile(r'''[:.]''')
        self.dictionaries = {}
        self.word_cache = {}
        self.ignored_words = set()
        self.added_user_words = {}
        try:
            self.default_locale = parse_lang_code(get_lang())
        except ValueError:
            self.default_locale = parse_lang_code('en-US')
        self.ui_locale = self.default_locale

    def initialize(self, force=False):
        if force or not hasattr(self, 'active_user_dictionaries'):
            self.read_user_dictionaries()

    def clear_caches(self):
        self.dictionaries.clear(), self.word_cache.clear()

    def clear_ignored(self):
        self.ignored_words.clear()

    def dictionary_for_locale(self, locale):
        ans = self.dictionaries.get(locale, not_present)
        if ans is not_present:
            ans = get_dictionary(locale)
            if ans is not None:
                ans = load_dictionary(ans)
                for ud in self.active_user_dictionaries:
                    for word, langcode in ud.words:
                        if langcode == locale.langcode:
                            try:
                                ans.obj.add(word)
                            except Exception:
                                # not critical since all it means is that the word won't show up in suggestions
                                prints(f'Failed to add the word {word!r} to the dictionary for {locale}', file=sys.stderr)
            self.dictionaries[locale] = ans
        return ans

    def ignore_word(self, word, locale):
        self.ignored_words.add((word, locale.langcode))
        self.word_cache[(word, locale)] = True

    def unignore_word(self, word, locale):
        self.ignored_words.discard((word, locale.langcode))
        self.word_cache.pop((word, locale), None)

    def is_word_ignored(self, word, locale):
        return (word, locale.langcode) in self.ignored_words

    @property
    def all_user_dictionaries(self):
        return chain(self.active_user_dictionaries, self.inactive_user_dictionaries)

    def user_dictionary(self, name):
        for ud in self.all_user_dictionaries:
            if ud.name == name:
                return ud

    def read_user_dictionaries(self):
        self.active_user_dictionaries = []
        self.inactive_user_dictionaries = []
        for d in dprefs['user_dictionaries'] or dprefs.defaults['user_dictionaries']:
            d = UserDictionary(**d)
            (self.active_user_dictionaries if d.is_active else self.inactive_user_dictionaries).append(d)

    def mark_user_dictionary_as_active(self, name, is_active=True):
        d = self.user_dictionary(name)
        if d is not None:
            d.is_active = is_active
            self.save_user_dictionaries()
            return True
        return False

    def save_user_dictionaries(self):
        dprefs['user_dictionaries'] = [d.serialize() for d in self.all_user_dictionaries]

    def add_user_words(self, words, langcode):
        for d in self.dictionaries.values():
            if d and getattr(d.primary_locale, 'langcode', None) == langcode:
                for word in words:
                    d.obj.add(word)

    def remove_user_words(self, words, langcode):
        for d in self.dictionaries.values():
            if d and d.primary_locale.langcode == langcode:
                for word in words:
                    d.obj.remove(word)

    def add_to_user_dictionary(self, name, word, locale):
        ud = self.user_dictionary(name)
        if ud is None:
            raise ValueError(f'Cannot add to the dictionary named: {name} as no such dictionary exists')
        wl = len(ud.words)
        if isinstance(word, (set, frozenset)):
            ud.words |= word
            self.add_user_words({x[0] for x in word}, locale.langcode)
        else:
            ud.words.add((word, locale.langcode))
            self.add_user_words((word,), locale.langcode)
        if len(ud.words) > wl:
            self.save_user_dictionaries()
            try:
                self.word_cache.pop((word, locale), None)
            except TypeError:
                pass  # word is a set, ignore
            return True
        return False

    def remove_from_user_dictionaries(self, word, locale):
        key = (word, locale.langcode)
        changed = False
        for ud in self.active_user_dictionaries:
            if key in ud.words:
                changed = True
                ud.words.discard(key)
        if changed:
            self.word_cache.pop((word, locale), None)
            self.save_user_dictionaries()
            self.remove_user_words((word,), locale.langcode)
        return changed

    def remove_from_user_dictionary(self, name, words):
        changed = False
        removals = defaultdict(set)
        keys = [(w, l.langcode) for w, l in words]
        for d in self.all_user_dictionaries:
            if d.name == name:
                for key in keys:
                    if key in d.words:
                        d.words.discard(key)
                        removals[key[1]].add(key[0])
                        changed = True
        if changed:
            for key in words:
                self.word_cache.pop(key, None)
            for langcode, words in removals.items():
                self.remove_user_words(words, langcode)
            self.save_user_dictionaries()
        return changed

    def word_in_user_dictionary(self, word, locale):
        key = (word, locale.langcode)
        for ud in self.active_user_dictionaries:
            if key in ud.words:
                return ud.name

    def create_user_dictionary(self, name):
        if name in {d.name for d in self.all_user_dictionaries}:
            raise ValueError(f'A dictionary named {name} already exists')
        d = UserDictionary(name=name, is_active=True, words=())
        self.active_user_dictionaries.append(d)
        self.save_user_dictionaries()

    def remove_user_dictionary(self, name):
        changed = False
        for x in (self.active_user_dictionaries, self.inactive_user_dictionaries):
            for d in tuple(x):
                if d.name == name:
                    x.remove(d)
                    changed = True
        if changed:
            self.save_user_dictionaries()
            self.clear_caches()
        return changed

    def rename_user_dictionary(self, name, new_name):
        changed = False
        for d in self.all_user_dictionaries:
            if d.name == name:
                d.name = new_name
                changed = True
        if changed:
            self.save_user_dictionaries()
        return changed

    def recognized(self, word, locale=None):
        locale = locale or self.default_locale
        key = (word, locale)
        ans = self.word_cache.get(key, None)
        if ans is None:
            lkey = (word, locale.langcode)
            ans = False
            if lkey in self.ignored_words:
                ans = True
            else:
                for ud in self.active_user_dictionaries:
                    if lkey in ud.words:
                        ans = True
                        break
                else:
                    d = self.dictionary_for_locale(locale)
                    if d is not None:
                        try:
                            ans = d.obj.recognized(word.replace('\u2010', '-'))
                        except ValueError:
                            pass
                    else:
                        ans = True
            if ans is False and self.negative_pat.match(word) is not None:
                ans = True
            self.word_cache[key] = ans
        return ans

    def suggestions(self, word, locale=None):
        locale = locale or self.default_locale
        d = self.dictionary_for_locale(locale)
        has_unicode_hyphen = '\u2010' in word
        ans = ()

        def add_suggestion(w, ans):
            return (w,) + tuple(x for x in ans if x != w)

        if d is not None:
            try:
                ans = d.obj.suggest(str(word).replace('\u2010', '-'))
            except ValueError:
                pass
            else:
                dehyphenated_word = self.remove_hyphenation.sub('', word)
                if len(dehyphenated_word) != len(word) and self.recognized(dehyphenated_word, locale):
                    # Ensure the de-hyphenated word is present and is the first suggestion
                    ans = add_suggestion(dehyphenated_word, ans)
                else:
                    m = self.fix_punctuation_pat.search(word)
                    if m is not None:
                        w1, w2 = word[:m.start()], word[m.end():]
                        if self.recognized(w1) and self.recognized(w2):
                            fw = w1 + m.group() + ' ' + w2
                            ans = add_suggestion(fw, ans)
                            if capitalize(w2) != w2:
                                fw = w1 + m.group() + ' ' + capitalize(w2)
                                ans = add_suggestion(fw, ans)

        if has_unicode_hyphen:
            ans = tuple(w.replace('-', '\u2010') for w in ans)
        return ans


def build_test():
    dictionaries = Dictionaries()
    dictionaries.initialize()
    eng = parse_lang_code('en')
    if not dictionaries.recognized('recognized', locale=eng):
        raise AssertionError('The word recognized was not recognized')


def find_tests():
    import unittest

    class TestDictionaries(unittest.TestCase):

        def setUp(self):
            dictionaries = Dictionaries()
            dictionaries.initialize()
            eng = parse_lang_code('en-GB')
            self.recognized = partial(dictionaries.recognized, locale=eng)
            self.suggestions = partial(dictionaries.suggestions, locale=eng)

        def ar(self, w):
            if not self.recognized(w):
                raise AssertionError(f'The word {w!r} was not recognized')

        def test_dictionaries(self):
            for w in 'recognized one-half one\u2010half'.split():
                self.ar(w)
            d = load_dictionary(get_dictionary(parse_lang_code('es-ES'))).obj
            self.assertTrue(d.recognized('Ahí'))
            self.assertIn('one\u2010half', self.suggestions('oone\u2010half'))
            d = load_dictionary(get_dictionary(parse_lang_code('es'))).obj
            self.assertIn('adequately', self.suggestions('ade-quately'))
            self.assertIn('magic. Wand', self.suggestions('magic.wand'))
            self.assertIn('List', self.suggestions('Lis𝑘t'))

    return unittest.TestLoader().loadTestsFromTestCase(TestDictionaries)


def test():
    from calibre.utils.run_tests import run_cli
    run_cli(find_tests())


if __name__ == '__main__':
    test()
